source('../settings/settings.R')
source('commonFunctions.R')
inputFileDrive1 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=1, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive2 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=2, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive3 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=3, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive4 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=4, distPrev=30, distNext=30))
drive1 <- read.csv(inputFileDrive1)
drive2 <- read.csv(inputFileDrive2)
drive3 <- read.csv(inputFileDrive3)
drive4 <- read.csv(inputFileDrive4, stringsAsFactors = T)
set.seed(43)
combinedDf <- cbind(drive4,
drive1$MeanPP_Seg0,
drive2$MeanPP, drive3$MeanPP,
drive2$StdPP, drive3$StdPP,
drive2$MeanPP_SegMax, drive3$MeanPP_SegMax,
drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
drive2$StdPP_SegMax, drive3$StdPP_SegMax,
drive2$StdPP_Seg0, drive3$StdPP_Seg0,
drive2$MeanPP_AccHigh, drive3$MeanPP_AccHigh,
drive2$X.MeanPP_AccLow, drive3$X.MeanPP_AccLow,
drive2$StdPP_AccHigh, drive3$StdPP_AccHigh,
drive2$StdPP_AccLow, drive3$StdPP_AccLow
)
names(combinedDf) <- c(names(drive4),
"PP_Dev_1_Turning",
"PP_Dev_2", "PP_Dev_3",
"Std_PP_2", "Std_PP_3",
"PP_Dev_2_Straight", "PP_Dev_3_Straight",
"PP_Dev_2_Turning", "PP_Dev_3_Turning",
"Std_PP_2_Straight", "Std_PP_3_Straight",
"Std_PP_2_Turning", "Std_PP_3_Turning",
"Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
"Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
"Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
"Std_PP_2_AccLow", "Std_PP_3_AccLow"
)
combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
combinedDf$ActivityEncoded <- factor(ifelse(combinedDf$Activity == "NO", "1", ifelse(combinedDf$Activity == "C", "2", "3")))
# combinedDf$PP_Dev_2_Turning <- ifelse(combinedDf$PP_Dev_2_Turning > 0, combinedDf$PP_Dev_2_Turning, combinedDf$PP_Dev_2_Straight)
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]
combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='green')
COLOR_FAILURE <- list(color='red')
COLOR_COGNITIVE_ACC <- list(color='rgb(158,202,225)')
COLOR_MOTORIC_ACC <- list(color='rgb(58,200,225)')
bargap <- 6
yAxis <- list(
title = "Arousal ΔPP [ln°C²]",
range=c(-0.2, 0.6)
)
# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_After # PP_Dev
ppDevArray <- matrix(ppDev, nrow = 1,ncol = length(ppDev))
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))
[1] "Threshold: 0.12638427734375"
MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
xAxis = list(
title = "Subject",
ticktext = combinedDf_NoStressor$Subject,
tickvals = seq(1, bargap * nrow(combinedDf_NoStressor), by=bargap),
tickmode = "array"
)
combinedDf_NoStressor$SubjectLevel <- seq(1, bargap * nrow(combinedDf_NoStressor), by=bargap)
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~SubjectLevel, width=1000) %>%
# add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>%
# add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>%
# add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>%
add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>%
# add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>%
add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
line=list(color="blue", dash = 'dot')) %>%
# add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
# line=list(color="darkred", dash = 'dot')) %>%
layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F)
fig_NoStressor <- fig_NoStressor %>% config(mathjax = 'cdn')
htmltools::tagList(fig_NoStressor)
xAxis = list(
title = "Subject",
ticktext = combinedDf_Cognitive$Subject,
tickvals = seq(1, bargap * nrow(combinedDf_Cognitive), by=bargap),
tickmode = "array"
)
combinedDf_Cognitive$SubjectLevel <- seq(1, bargap * nrow(combinedDf_Cognitive), by=bargap)
fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~SubjectLevel, width=1000) %>%
# add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>%
# add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>%
# add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>%
add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>%
# add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>%
add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
line=list(color="blue", dash = 'dot')) %>%
# add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
# line=list(color="darkred", dash = 'dot')) %>%
layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F)
htmltools::tagList(fig_Cognitive)
xAxis = list(
title = "Subject",
ticktext = combinedDf_Motoric$Subject,
tickvals = seq(1, bargap * nrow(combinedDf_Motoric), by=bargap),
tickmode = "array"
)
combinedDf_Motoric$SubjectLevel <- seq(1, bargap * nrow(combinedDf_Motoric), by=bargap)
fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~SubjectLevel, width=1000) %>%
# add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
# add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>%
add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>%
# add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>%
# add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>%
add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>%
# add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
# add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>%
add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
line=list(color="blue", dash = 'dot')) %>%
# add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
# line=list(color="darkred", dash = 'dot')) %>%
layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F)
htmltools::tagList(fig_Motoric)
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)
Linear model with all variables
linearModel1 <- lm(PP_After ~
+ PP_Dev_2_Straight
+ PP_Dev_3_Straight
+ PP_Dev_2_Turning
+ PP_Dev_3_Turning
+ Std_PP_2_Straight
+ Std_PP_3_Straight
+ Std_PP_2_Turning
+ Std_PP_3_Turning
+ PP_Prior
+ factor(ActivityEncoded),
data=combinedDf)
# anova(model)
summary(linearModel1)
Call:
lm(formula = PP_After ~ +PP_Dev_2_Straight + PP_Dev_3_Straight +
PP_Dev_2_Turning + PP_Dev_3_Turning + Std_PP_2_Straight +
Std_PP_3_Straight + Std_PP_2_Turning + Std_PP_3_Turning +
PP_Prior + factor(ActivityEncoded), data = combinedDf)
Residuals:
Min 1Q Median 3Q Max
-0.066469 -0.030574 -0.004234 0.016063 0.091691
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.07677 0.08187 -0.938 0.37286
PP_Dev_2_Straight 0.55986 0.32689 1.713 0.12093
PP_Dev_3_Straight -0.88019 0.41257 -2.133 0.06168 .
PP_Dev_2_Turning -0.27590 0.42870 -0.644 0.53590
PP_Dev_3_Turning 0.68958 0.44285 1.557 0.15386
Std_PP_2_Straight 0.51998 1.23819 0.420 0.68437
Std_PP_3_Straight 1.17270 0.65813 1.782 0.10846
Std_PP_2_Turning -0.39849 1.57661 -0.253 0.80614
Std_PP_3_Turning -0.41772 1.11291 -0.375 0.71610
PP_Prior 0.77406 0.20609 3.756 0.00451 **
factor(ActivityEncoded)2 0.09572 0.06604 1.449 0.18119
factor(ActivityEncoded)3 0.14416 0.05163 2.792 0.02098 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.06346 on 9 degrees of freedom
Multiple R-squared: 0.9231, Adjusted R-squared: 0.8292
F-statistic: 9.824 on 11 and 9 DF, p-value: 0.0009588
plot(linearModel1)




linearModel1 <- lm(PP_After ~
Mean_PP_2_AccHigh
+ Mean_PP_2_AccLow
+ Mean_PP_3_AccHigh
+ Mean_PP_3_AccLow
+ Std_PP_2_AccHigh
+ Std_PP_2_AccLow
+ Std_PP_3_AccHigh
+ Std_PP_3_AccLow
# + PP_Prior
+ factor(ActivityEncoded),
data=combinedDf)
# anova(model)
summary(linearModel1)
Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow +
Mean_PP_3_AccHigh + Mean_PP_3_AccLow + Std_PP_2_AccHigh +
Std_PP_2_AccLow + Std_PP_3_AccHigh + Std_PP_3_AccLow + factor(ActivityEncoded),
data = combinedDf)
Residuals:
Min 1Q Median 3Q Max
-0.103753 -0.053453 0.008069 0.042675 0.072533
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.38487 0.10071 -3.821 0.00337 **
Mean_PP_2_AccHigh 1.92518 0.64384 2.990 0.01357 *
Mean_PP_2_AccLow -1.57985 0.64227 -2.460 0.03369 *
Mean_PP_3_AccHigh 3.09455 0.79809 3.877 0.00307 **
Mean_PP_3_AccLow -2.36870 0.76639 -3.091 0.01143 *
Std_PP_2_AccHigh 4.76271 4.11946 1.156 0.27449
Std_PP_2_AccLow -4.20550 2.84707 -1.477 0.17043
Std_PP_3_AccHigh 1.29380 1.87829 0.689 0.50660
Std_PP_3_AccLow 2.99288 2.18566 1.369 0.20086
factor(ActivityEncoded)2 0.20599 0.05116 4.026 0.00241 **
factor(ActivityEncoded)3 0.15111 0.05411 2.793 0.01903 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.07786 on 10 degrees of freedom
Multiple R-squared: 0.8714, Adjusted R-squared: 0.7428
F-statistic: 6.777 on 10 and 10 DF, p-value: 0.002822
plot(linearModel1)




With Prior
linearModelWPrior <- lm(PP_After ~
Mean_PP_2_AccHigh
+ Mean_PP_2_AccLow
+ Mean_PP_3_AccHigh
+ Mean_PP_3_AccLow
+ Std_PP_2_AccHigh
+ Std_PP_2_AccLow
+ Std_PP_3_AccHigh
+ Std_PP_3_AccLow
+ PP_Prior
+ factor(ActivityEncoded),
data=combinedDf)
# anova(model)
summary(linearModelWPrior)
Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow +
Mean_PP_3_AccHigh + Mean_PP_3_AccLow + Std_PP_2_AccHigh +
Std_PP_2_AccLow + Std_PP_3_AccHigh + Std_PP_3_AccLow + PP_Prior +
factor(ActivityEncoded), data = combinedDf)
Residuals:
Min 1Q Median 3Q Max
-0.052370 -0.020532 0.000148 0.024951 0.048104
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.23196 0.07296 -3.179 0.01120 *
Mean_PP_2_AccHigh 1.42749 0.41884 3.408 0.00777 **
Mean_PP_2_AccLow -1.19436 0.41079 -2.907 0.01738 *
Mean_PP_3_AccHigh 1.51586 0.62870 2.411 0.03918 *
Mean_PP_3_AccLow -1.29049 0.54487 -2.368 0.04202 *
Std_PP_2_AccHigh 6.67707 2.60696 2.561 0.03062 *
Std_PP_2_AccLow -4.23334 1.77260 -2.388 0.04068 *
Std_PP_3_AccHigh -0.84881 1.28095 -0.663 0.52417
Std_PP_3_AccLow 3.09137 1.36101 2.271 0.04925 *
PP_Prior 0.70129 0.17111 4.099 0.00268 **
factor(ActivityEncoded)2 0.12193 0.03789 3.218 0.01052 *
factor(ActivityEncoded)3 0.16048 0.03376 4.753 0.00104 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.04847 on 9 degrees of freedom
Multiple R-squared: 0.9551, Adjusted R-squared: 0.9003
F-statistic: 17.42 on 11 and 9 DF, p-value: 9.605e-05
plot(linearModelWPrior)




linearModel3 <- lm(PP_After ~
Mean_PP_2_AccHigh
+ Mean_PP_2_AccLow
+ Mean_PP_3_AccHigh
+ Mean_PP_3_AccLow
# + PP_Prior
+ factor(ActivityEncoded),
data=combinedDf)
# anova(model)
summary(linearModel3)
Call:
lm(formula = PP_After ~ Mean_PP_2_AccHigh + Mean_PP_2_AccLow +
Mean_PP_3_AccHigh + Mean_PP_3_AccLow + factor(ActivityEncoded),
data = combinedDf)
Residuals:
Min 1Q Median 3Q Max
-0.106576 -0.057424 0.004053 0.045385 0.144030
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.22818 0.06871 -3.321 0.00505 **
Mean_PP_2_AccHigh 1.94837 0.68471 2.846 0.01296 *
Mean_PP_2_AccLow -1.44379 0.68502 -2.108 0.05357 .
Mean_PP_3_AccHigh 3.05982 0.76503 4.000 0.00132 **
Mean_PP_3_AccLow -2.51493 0.72785 -3.455 0.00386 **
factor(ActivityEncoded)2 0.18649 0.05498 3.392 0.00438 **
factor(ActivityEncoded)3 0.19053 0.04764 3.999 0.00132 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.08544 on 14 degrees of freedom
Multiple R-squared: 0.7832, Adjusted R-squared: 0.6903
F-statistic: 8.43 on 6 and 14 DF, p-value: 0.0005323
plot(linearModel3)




# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel3)$coefficients
lmAnova <- anova(linearModel3)
print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Wed Jul 15 01:02:05 2020
\begin{table}[ht]
\centering
\begin{tabular}{rrrrr}
\hline
& Estimate & Std. Error & t value & Pr($>$$|$t$|$) \\
\hline
(Intercept) & -0.22818 & 0.06871 & -3.32083 & 0.00505 \\
Mean\_PP\_2\_AccHigh & 1.94837 & 0.68471 & 2.84555 & 0.01296 \\
Mean\_PP\_2\_AccLow & -1.44379 & 0.68502 & -2.10767 & 0.05357 \\
Mean\_PP\_3\_AccHigh & 3.05982 & 0.76503 & 3.99960 & 0.00132 \\
Mean\_PP\_3\_AccLow & -2.51493 & 0.72785 & -3.45527 & 0.00386 \\
factor(ActivityEncoded)2 & 0.18649 & 0.05498 & 3.39189 & 0.00438 \\
factor(ActivityEncoded)3 & 0.19053 & 0.04764 & 3.99910 & 0.00132 \\
\hline
\end{tabular}
\end{table}
print(xtable(lmAnova), digits=c(0,5,5,5,5))
% latex table generated in R 3.6.1 by xtable 1.8-4 package
% Wed Jul 15 01:02:05 2020
\begin{table}[ht]
\centering
\begin{tabular}{lrrrrr}
\hline
& Df & Sum Sq & Mean Sq & F value & Pr($>$F) \\
\hline
Mean\_PP\_2\_AccHigh & 1 & 0.15 & 0.15 & 20.66 & 0.0005 \\
Mean\_PP\_2\_AccLow & 1 & 0.00 & 0.00 & 0.05 & 0.8310 \\
Mean\_PP\_3\_AccHigh & 1 & 0.01 & 0.01 & 1.42 & 0.2535 \\
Mean\_PP\_3\_AccLow & 1 & 0.06 & 0.06 & 8.61 & 0.0109 \\
factor(ActivityEncoded) & 2 & 0.14 & 0.07 & 9.92 & 0.0021 \\
Residuals & 14 & 0.10 & 0.01 & & \\
\hline
\end{tabular}
\end{table}
ppAfter <- combinedDf$PP_After
ppAfterArray <- matrix(ppAfter, nrow = 1,ncol = length(ppAfter))
thresholdPPAfter <- otsu(ppAfterArray, range=c(min(ppAfter), max(ppAfter))) # Expected Threshold > 0.042
print(paste0('Threshold: ', thresholdPPAfter))
[1] "Threshold: 0.12638427734375"
selectedDf <- combinedDf %>% select(
"Subject", "Activity", "PP_After", # "PP_Prior",
"Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
"Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
# "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
# "Std_PP_2_AccLow", "Std_PP_3_AccLow"
)
selectedDf$Subject <- NULL
selectedDf$Activity_NO <- ifelse(selectedDf$Activity == "NO", 1, 0)
selectedDf$Activity_C <- ifelse(selectedDf$Activity == "C", 1, 0)
selectedDf$Activity_M <- ifelse(selectedDf$Activity == "M", 1, 0)
selectedDf$Activity <- NULL
# selectedDf$PP_Dev_1_Turning <- NULL
# selectedDf$Std_PP_2_Straight <- NULL
# selectedDf$Std_PP_2_Turning <- NULL
# selectedDf$Std_PP_3_Straight <- NULL
# selectedDf$Std_PP_3_Turning <- NULL
#
# # According to Linear model
# selectedDf$PP_Dev_2_Straight <- abs(selectedDf$PP_Dev_2_Straight)
# selectedDf$PP_Dev_3_Straight <- abs(selectedDf$PP_Dev_3_Straight)
# selectedDf$PP_Dev_2_Turning <- abs(selectedDf$PP_Dev_2_Turning)
# selectedDf$PP_Dev_3_Turning <- abs(selectedDf$PP_Dev_3_Turning)
# selectedDf$PP_Prior <- abs(selectedDf$PP_Prior) # NULL
selectedDf$Class <- ifelse(selectedDf$PP_After >= thresholdPPAfter, T, F)
selectedDf$PP_After <- NULL
print(names(selectedDf))
[1] "Mean_PP_2_AccHigh" "Mean_PP_3_AccHigh" "Mean_PP_2_AccLow" "Mean_PP_3_AccLow" "Activity_NO" "Activity_C"
[7] "Activity_M" "Class"
# library(mefa)
# combinedDf <- rep(combinedDf, 10)
set.seed(39)
n_folds <- 3
params <- param <- list(objective = "binary:logistic",
booster = "gbtree",
eval_metric = "auc",
eta = 0.1,
max_depth = 10,
alpha = 1,
lambda = 0,
gamma = 0.45,
min_child_weight = 0.3,
subsample = 0.5,
colsample_bytree = 1)
# XGBoost Model
xgb_m <- xgb.cv( params = param,
data = as.matrix(selectedDf %>% select(-Class)) ,
label = selectedDf$Class,
nrounds = 100,
verbose = F,
prediction = T,
maximize = F, # Change this value to F will help to run with more itineration
nfold = n_folds,
metrics = c("auc", "error"),
early_stopping_rounds = 50,
stratified = T,
scale_pos_weight = 1)
# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]
NA
Performance Metrics
# Prediction
selectedDf$clsPred <- round(xgb_m$pred)
computePerformanceResults <- function(sdat){
sdat = sdat[complete.cases(sdat),]
acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
conf_mat = table(sdat)
specif = conf_mat[1,1]/sum(conf_mat[,1])
sensiv = conf_mat[2,2]/sum(conf_mat[,2])
preci = conf_mat[2,2]/sum(conf_mat[2,])
npv = conf_mat[1,1]/sum(conf_mat[1,])
return(c(acc,specif,sensiv,preci,npv))
}
# Get average performance
performance <- computePerformanceResults(selectedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])
print(paste("Accuracy=", round(acc, 2)))
[1] "Accuracy= 0.71"
print(paste("Precision=", round(prec, 2)))
[1] "Precision= 0.44"
print(paste("Recall=", round(recall, 2)))
[1] "Recall= 0.8"
print(paste("Specificity=", round(spec, 2)))
[1] "Specificity= 0.69"
print(paste("NPV=", round(npv, 2)))
[1] "NPV= 0.92"
print(paste("F1=", round(f1, 2)))
[1] "F1= 0.57"
print(paste("AUC=", round(auc, 2)))
[1] "AUC= 0.84"
# Importance
bst <- xgboost( params = param,
data = as.matrix(selectedDf %>% select(-c(Class, clsPred))) ,
label = selectedDf$Class,
nrounds = 100,
verbose = F,
prediction = T,
maximize = F, # Change this value to F will help to run with more itineration
nfold = n_folds,
metrics = c("auc", "error"),
early_stopping_rounds = 50,
stratified = T,
scale_pos_weight = 1)
importanceDf <- xgb.importance(colnames(selectedDf %>% select(-c(Class, clsPred))), model = bst)
print(importanceDf)
library(pROC)
dfROC <- pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
predictor = round(xgb_m$pred),
levels=c(0, 1), direction = "<")
# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter
plot(pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
predictor = round(xgb_m$pred),
levels=c(0, 1), direction = "<"),
legacy.axes = TRUE,
main="ROC Curve",
lwd=1.5)

Plot feature importance
yAxis <- list(
title = 'Importance',
range=c(0.0, 1.0)
)
xAxis <- list(
title = ''
)
importanceDf$Feature <- factor(importanceDf$Feature, levels = importanceDf[order(-Gain),]$Feature)
fig_Importance <- plot_ly(importanceDf, x = ~Feature, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
add_trace(y = ~Cover, name = 'Cover') %>%
add_trace(y = ~Frequency, name = 'Frequency') %>%
layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>%
config(.Last.value, mathjax = 'cdn')
htmltools::tagList(fig_Importance)
actualCluster <- data.frame(cbind(as.character(combinedDf$Subject), selectedDf$Class))
names(actualCluster) <- c("Subject", "Class")
actualCluster
# actualCluster[order(Class),]
library(factoextra)
library(cluster)
clusteringDf <- combinedDf %>% select("Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh") #(importanceDf$Feature[1:3])
rownames(selectedDf) <- paste0(combinedDf$Subject)
rownames(clusteringDf) <- paste0(combinedDf$Subject)
fit <- kmeans(clusteringDf, 3)
# clusplot(clusteringDf, fit$cluster, color=TRUE, shade=TRUE, labels=2, lines=0)
fviz_cluster(fit, data=selectedDf)

library(dendextend)
NUMBER_OF_CLUSTERS = 4
CLUSTER_COLORS <- c("red", "blue", color_darkpink, color_darkpink)
color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
behavioralMatrixClustering <- as.matrix(clusteringDf)
rownames(behavioralMatrixClustering) <- paste0(combinedDf$Subject)
distMatrix <- dist(behavioralMatrixClustering, method="manhattan")
hresults <- distMatrix %>% hclust(method="complete")
hc <- hresults %>%
as.dendrogram %>%
set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
# set("leaves_pch", 19) %>%
# set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%
set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)
plot(hc)
legend("topright",
title="Drive=Failure \nChange of Arousal",
legend = c("Exceptional Increase" , "Noticable Increase" , "No-change or Decrease"),
col = c("red", "pink" , "blue"),
pch = c(20,20,20), bty = "n", pt.cex = 1.5, cex = 0.8 ,
text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))

NUMBER_OF_CLUSTERS <- 2
CLUSTER_COLORS <- c("red", "blue", color_darkpink, color_darkpink)
color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
combinedDf$isM <- ifelse(combinedDf$Activity == "M", 0.1, 0)
combinedDf$isC <- ifelse(combinedDf$Activity == "C", 0.1, 0)
combinedDf$isN <- ifelse(combinedDf$Activity == "NO", 0.1, 0)
behavioralMatrixClustering <- as.matrix(combinedDf %>% select("PP_After", "isM", "isC", "isN"))
rownames(behavioralMatrixClustering) <- paste0(combinedDf$Subject)
distMatrix <- dist(behavioralMatrixClustering, method="manhattan")
hresults <- distMatrix %>% hclust(method="complete")
hc <- hresults %>%
as.dendrogram %>%
set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
# set("leaves_pch", 19) %>%
# set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%
set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)
plot(hc)
legend("topright",
title="Drive=Failure \nChange of Arousal",
legend = c("Exceptional Increase" , "Noticable Increase" , "No-change or Decrease"),
col = c("red", "pink" , "blue"),
pch = c(20,20,20), bty = "n", pt.cex = 1.5, cex = 0.8 ,
text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))

---
title: "R Notebook"
output: html_notebook
---

```{r}
source('../settings/settings.R')
source('commonFunctions.R')
```

```{r}
inputFileDrive1 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=1, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive2 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=2, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive3 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=3, distPrev=DISTANCE_PREV, distNext=DISTANCE_NEXT))
inputFileDrive4 <- str_interp("../data/processed/analysis/TT1_Drive_${drive}_PP_${distPrev}m_${distNext}m.csv", list(drive=4, distPrev=30, distNext=30))

drive1 <- read.csv(inputFileDrive1)
drive2 <- read.csv(inputFileDrive2)
drive3 <- read.csv(inputFileDrive3)

drive4 <- read.csv(inputFileDrive4, stringsAsFactors = T)
```

```{r}
set.seed(43)
combinedDf <- cbind(drive4, 
                    drive1$MeanPP_Seg0, 
                    drive2$MeanPP, drive3$MeanPP,
                    drive2$StdPP, drive3$StdPP,
                    drive2$MeanPP_SegMax, drive3$MeanPP_SegMax, 
                    drive2$MeanPP_Seg0, drive3$MeanPP_Seg0,
                    drive2$StdPP_SegMax, drive3$StdPP_SegMax, 
                    drive2$StdPP_Seg0, drive3$StdPP_Seg0,
                    drive2$MeanPP_AccHigh, drive3$MeanPP_AccHigh,
                    drive2$X.MeanPP_AccLow, drive3$X.MeanPP_AccLow,
                    drive2$StdPP_AccHigh, drive3$StdPP_AccHigh,
                    drive2$StdPP_AccLow, drive3$StdPP_AccLow
                  )
names(combinedDf) <- c(names(drive4), 
                       "PP_Dev_1_Turning",
                       "PP_Dev_2", "PP_Dev_3", 
                       "Std_PP_2", "Std_PP_3",
                       "PP_Dev_2_Straight", "PP_Dev_3_Straight", 
                       "PP_Dev_2_Turning", "PP_Dev_3_Turning", 
                       "Std_PP_2_Straight", "Std_PP_3_Straight", 
                       "Std_PP_2_Turning", "Std_PP_3_Turning",
                       "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                       "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                       "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                       "Std_PP_2_AccLow", "Std_PP_3_AccLow"
                       )

combinedDf$Subject <- paste0("#", str_pad(combinedDf$Subject, 2, pad="0"))
combinedDf$ActivityEncoded <- factor(ifelse(combinedDf$Activity == "NO", "1", ifelse(combinedDf$Activity == "C", "2", "3")))

# combinedDf$PP_Dev_2_Turning <- ifelse(combinedDf$PP_Dev_2_Turning > 0, combinedDf$PP_Dev_2_Turning, combinedDf$PP_Dev_2_Straight)
```

```{r}
combinedDf_NoStressor <- combinedDf[combinedDf$Activity == "NO",]
combinedDf_Cognitive <- combinedDf[combinedDf$Activity == "C",]
combinedDf_Motoric <- combinedDf[combinedDf$Activity == "M",]

combinedDf_NoStressor$Subject <- as.factor(combinedDf_NoStressor$Subject)
combinedDf_Cognitive$Subject <- as.factor(combinedDf_Cognitive$Subject)
combinedDf_Motoric$Subject <- as.factor(combinedDf_Motoric$Subject)
```

```{r}
COLOR_NORMAL <- list(color='rgb(120,120,120)')
COLOR_COGNITIVE <- list(color='rgb(158,202,225)')
COLOR_MOTORIC <- list(color='rgb(58,200,225)')
COLOR_FAILURE_PRIOR <- list(color='green')
COLOR_FAILURE <- list(color='red')
COLOR_COGNITIVE_ACC <- list(color='rgb(158,202,225)')
COLOR_MOTORIC_ACC <- list(color='rgb(58,200,225)')

bargap <- 6
yAxis <- list(
  title = "Arousal ΔPP [ln°C²]",
  range=c(-0.2, 0.6)
)

# Apply Otsu algorithm to select threshold
ppDev <- combinedDf$PP_After # PP_Dev
ppDevArray <- matrix(ppDev, nrow = 1,ncol = length(ppDev))
  
THRESHOLD_MILD = otsu(ppDevArray, range=c(min(ppDev), max(ppDev))) # Expected Threshold > 0.042
print(paste0('Threshold: ', THRESHOLD_MILD))

MARKER_LINE_MILD = list(color="blue")
MARKER_LINE_EXTREME = list(color="red")
```

```{r, warning=F}
xAxis = list(
  title = "Subject",
  ticktext = combinedDf_NoStressor$Subject, 
  tickvals = seq(1, bargap * nrow(combinedDf_NoStressor), by=bargap),
  tickmode = "array"
)
combinedDf_NoStressor$SubjectLevel <- seq(1, bargap * nrow(combinedDf_NoStressor), by=bargap)
      
fig_NoStressor <- plot_ly(combinedDf_NoStressor, x = ~SubjectLevel, width=1000) %>%
  # add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  # add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  # add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>% 
  # add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  # add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  # add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>% 
  # add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
  
  # add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>% 
  add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F) 

fig_NoStressor <- fig_NoStressor %>% config(mathjax = 'cdn')

htmltools::tagList(fig_NoStressor)
```

```{r, warning=F}
xAxis = list(
  title = "Subject",
  ticktext = combinedDf_Cognitive$Subject, 
  tickvals = seq(1, bargap * nrow(combinedDf_Cognitive), by=bargap),
  tickmode = "array"
)
combinedDf_Cognitive$SubjectLevel <- seq(1, bargap * nrow(combinedDf_Cognitive), by=bargap)

fig_Cognitive <- plot_ly(combinedDf_Cognitive, x = ~SubjectLevel, width=1000) %>%
  # add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  # add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  # add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>% 
  # add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  # add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  # add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>% 
  # add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
  
  # add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>% 
  add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F)

htmltools::tagList(fig_Cognitive)
```



```{r, warning=F}
xAxis = list(
  title = "Subject",
  ticktext = combinedDf_Motoric$Subject, 
  tickvals = seq(1, bargap * nrow(combinedDf_Motoric), by=bargap),
  tickmode = "array"
)
combinedDf_Motoric$SubjectLevel <- seq(1, bargap * nrow(combinedDf_Motoric), by=bargap)

fig_Motoric <- plot_ly(combinedDf_Motoric, x = ~SubjectLevel, width=1000) %>%
  # add_trace(y = ~PP_Dev_2_Straight, name = 'Cognitive - Mean PP (Straight)', marker=COLOR_COGNITIVE) %>%
  # add_trace(y = ~PP_Dev_1_Turning, name = 'Normal - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  # add_trace(y = ~PP_Dev_2_Turning, name = 'Cognitive - Mean PP (Turning)', marker=COLOR_COGNITIVE) %>% 
  add_trace(type="bar", y = ~Mean_PP_2_AccHigh, width=1.58, name = 'ΔPP after HA in CD', marker=COLOR_COGNITIVE_ACC) %>% 
  # add_trace(y = ~Mean_PP_2_AccLow, name = 'Coginitive - Mean PP (Low Accel.)', marker=COLOR_ACC) %>% 
  
  # add_trace(y = ~PP_Dev_3_Straight, name = 'Motoric - Mean PP (Straight)', marker=COLOR_MOTORIC) %>% 
  # add_trace(y = ~PP_Dev_3_Turning, name = 'Motoric - Mean PP (Turning)', marker=COLOR_MOTORIC) %>% 
  add_trace(type="bar", y = ~Mean_PP_3_AccHigh, width=1.58, name = "ΔPP after HA in MD", marker=COLOR_MOTORIC_ACC) %>% 
  # add_trace(y = ~Mean_PP_3_AccLow, name = 'Motoric - Mean PP (Low Accel.)', marker=COLOR_ACC) %>%
  
  # add_trace(y = ~PP_Prior, name = 'Failure - Prior PP', marker=COLOR_FAILURE_PRIOR) %>%
  add_trace(type="bar", y = ~PP_After, width=1.58, name = 'ΔPP after the catastrophic event', marker=COLOR_FAILURE) %>% 
  add_segments(x=-5, xend=bargap * nrow(combinedDf_NoStressor), y = THRESHOLD_MILD, yend = THRESHOLD_MILD, name="Otsu Threshold",
                           line=list(color="blue", dash = 'dot')) %>%
  # add_segments(x="#01", xend="#41", y = THRESHOLD_EXTREME, yend = THRESHOLD_EXTREME, name="Threshold: Extreme Change of PP",
  #                          line=list(color="darkred", dash = 'dot')) %>%
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', bargap=5, title=F)

htmltools::tagList(fig_Motoric)
```


```{r}
library(nlme)
combinedDf$Subject = as.factor(combinedDf$Subject)
combinedDf$Activity = as.factor(combinedDf$Activity)
combinedDf$PP_Dev_Group = ifelse(combinedDf$PP_Dev > THRESHOLD_MILD, 1, 0)
```

### Extract data for important features
```{r}
importantFeaturesDf <- combinedDf %>% select(Subject, Std_PP_3, PP_Dev_2_Turning, Activity, PP_Dev, PP_Dev_Group)
```

# Linear model with all variables
```{r}
linearModel1 <- lm(PP_After ~ 
              + PP_Dev_2_Straight
              + PP_Dev_3_Straight
              + PP_Dev_2_Turning
              + PP_Dev_3_Turning
              + Std_PP_2_Straight
              + Std_PP_3_Straight
              + Std_PP_2_Turning
              + Std_PP_3_Turning
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)
plot(linearModel1)
```

```{r}
linearModel1 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel1)
plot(linearModel1)
```

## With Prior
```{r}
linearModelWPrior <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              + Std_PP_2_AccHigh
              + Std_PP_2_AccLow
              + Std_PP_3_AccHigh
              + Std_PP_3_AccLow
              + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModelWPrior)
plot(linearModelWPrior)
```

```{r}
linearModel3 <- lm(PP_After ~ 
                Mean_PP_2_AccHigh
              + Mean_PP_2_AccLow
              + Mean_PP_3_AccHigh
              + Mean_PP_3_AccLow
              # + PP_Prior
              + factor(ActivityEncoded), 
            data=combinedDf)

# anova(model)
summary(linearModel3)
plot(linearModel3)
```


```{r}
# Export the anova table
library(xtable)
lmCoeffs <- summary(linearModel3)$coefficients
lmAnova <- anova(linearModel3)

print(xtable(lmCoeffs, digits=c(0,5,5,5,5)))
print(xtable(lmAnova), digits=c(0,5,5,5,5))

```


```{r}
ppAfter <- combinedDf$PP_After
ppAfterArray <- matrix(ppAfter, nrow = 1,ncol = length(ppAfter))
  
thresholdPPAfter <- otsu(ppAfterArray, range=c(min(ppAfter), max(ppAfter))) # Expected Threshold > 0.042
print(paste0('Threshold: ', thresholdPPAfter))

selectedDf <- combinedDf %>% select(
                  "Subject", "Activity", "PP_After", # "PP_Prior",
                  "Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh",
                  "Mean_PP_2_AccLow", "Mean_PP_3_AccLow",
                  # "Std_PP_2_AccHigh", "Std_PP_3_AccHigh",
                  # "Std_PP_2_AccLow", "Std_PP_3_AccLow"
                  )

selectedDf$Subject <- NULL
selectedDf$Activity_NO <- ifelse(selectedDf$Activity == "NO", 1, 0)
selectedDf$Activity_C <- ifelse(selectedDf$Activity == "C", 1, 0)
selectedDf$Activity_M <- ifelse(selectedDf$Activity == "M", 1, 0)
selectedDf$Activity <- NULL

# selectedDf$PP_Dev_1_Turning <- NULL
# selectedDf$Std_PP_2_Straight <- NULL
# selectedDf$Std_PP_2_Turning <- NULL
# selectedDf$Std_PP_3_Straight <- NULL
# selectedDf$Std_PP_3_Turning <- NULL
# 
# # According to Linear model
# selectedDf$PP_Dev_2_Straight <- abs(selectedDf$PP_Dev_2_Straight)
# selectedDf$PP_Dev_3_Straight <- abs(selectedDf$PP_Dev_3_Straight)
# selectedDf$PP_Dev_2_Turning <- abs(selectedDf$PP_Dev_2_Turning)
# selectedDf$PP_Dev_3_Turning <- abs(selectedDf$PP_Dev_3_Turning)
# selectedDf$PP_Prior <- abs(selectedDf$PP_Prior) # NULL

selectedDf$Class <- ifelse(selectedDf$PP_After >= thresholdPPAfter, T, F)
selectedDf$PP_After <- NULL

print(names(selectedDf))
```

```{r}
# library(mefa)
# combinedDf <- rep(combinedDf, 10) 
```

```{r}
set.seed(39)
n_folds <- 3
params <- param <- list(objective       = "binary:logistic", 
               booster          = "gbtree",
               eval_metric      = "auc",
               eta              = 0.1,
               max_depth        = 10,
               alpha            = 1,
               lambda           = 0,
               gamma            = 0.45,
               min_child_weight = 0.3,
               subsample        = 0.5,
               colsample_bytree = 1)
           
# XGBoost Model         
xgb_m <- xgb.cv(   params               = param,
                  data = as.matrix(selectedDf %>% select(-Class)) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)

# xgb_m$evaluation_log[xgb_m$best_iteration,"test_auc_mean"]
xgb_m$evaluation_log[xgb_m$best_iteration,]

```

## Performance Metrics
```{r}
# Prediction
selectedDf$clsPred <- round(xgb_m$pred)

computePerformanceResults <- function(sdat){
  sdat = sdat[complete.cases(sdat),]
  acc = sum(sdat[,1] == sdat[,2])/nrow(sdat)
  conf_mat = table(sdat)
  specif = conf_mat[1,1]/sum(conf_mat[,1])
  sensiv = conf_mat[2,2]/sum(conf_mat[,2])
  preci =  conf_mat[2,2]/sum(conf_mat[2,])
  npv =    conf_mat[1,1]/sum(conf_mat[1,])
  return(c(acc,specif,sensiv,preci,npv))
}

# Get average performance
performance <- computePerformanceResults(selectedDf %>% select(Class, clsPred))
acc <- performance[1]
prec <- performance[4]
recall <- performance[3]
spec <- performance[2]
npv <- performance[5]
f1 <- (2 * recall * prec) / (recall + prec)
auc <- as.numeric(xgb_m$evaluation_log[xgb_m$best_iteration, "test_auc_mean"])

print(paste("Accuracy=", round(acc, 2)))
print(paste("Precision=", round(prec, 2)))
print(paste("Recall=", round(recall, 2)))
print(paste("Specificity=", round(spec, 2)))
print(paste("NPV=", round(npv, 2)))
print(paste("F1=", round(f1, 2)))
print(paste("AUC=", round(auc, 2)))
```

```{r}
# Importance
bst <- xgboost(   params               = param,
                  data = as.matrix(selectedDf %>% select(-c(Class, clsPred))) ,
                  label =  selectedDf$Class,
                  nrounds             = 100,
                  verbose             = F,
                  prediction          = T,
                  maximize            = F, # Change this value to F will help to run with more itineration
                  nfold               = n_folds,
                  metrics             = c("auc", "error"),
                  early_stopping_rounds = 50,
                  stratified            = T,
                  scale_pos_weight      = 1)
importanceDf <- xgb.importance(colnames(selectedDf %>% select(-c(Class, clsPred))), model = bst)
print(importanceDf)
```

```{r}
library(pROC)

dfROC <- pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<")

# it = which.max(xgb_m$evaluation_log$test_auc_mean)
# best.iter = xgb_m$evaluation_log$iter[it]
# best.iter 

plot(pROC::roc(response = ifelse(selectedDf$Class==T, 1, 0),
               predictor = round(xgb_m$pred),
               levels=c(0, 1), direction = "<"), 
     legacy.axes = TRUE,
     main="ROC Curve", 
     lwd=1.5) 
```


### Plot feature importance
```{r}
yAxis <- list(
  title = 'Importance',
  range=c(0.0, 1.0)
)
xAxis <- list(
  title = 'Feature'
)

importanceDf$Feature <- factor(importanceDf$Feature, levels = importanceDf[order(-Gain),]$Feature)
fig_Importance <- plot_ly(importanceDf, x = ~Feature, y = ~Gain, type = 'bar', name = 'Gain', width=600) %>%
  add_trace(y = ~Cover, name = 'Cover') %>% 
  add_trace(y = ~Frequency, name = 'Frequency') %>% 
  layout(yaxis = yAxis, xaxis=xAxis, barmode = 'group', title="Feature Importance") %>% 
  config(.Last.value, mathjax = 'cdn')

htmltools::tagList(fig_Importance)
```

```{r}
actualCluster <- data.frame(cbind(as.character(combinedDf$Subject), selectedDf$Class))
names(actualCluster) <- c("Subject", "Class")
actualCluster
# actualCluster[order(Class),]
```

```{r}
library(factoextra)
library(cluster)
clusteringDf <- combinedDf %>% select("Mean_PP_2_AccHigh", "Mean_PP_3_AccHigh") #(importanceDf$Feature[1:3])
rownames(selectedDf) <- paste0(combinedDf$Subject)
rownames(clusteringDf) <- paste0(combinedDf$Subject)
fit <- kmeans(clusteringDf, 3)
# clusplot(clusteringDf, fit$cluster, color=TRUE, shade=TRUE, labels=2, lines=0)
fviz_cluster(fit, data=selectedDf)
```


```{r}
library(dendextend)

NUMBER_OF_CLUSTERS = 4
CLUSTER_COLORS <- c("red", "blue", color_darkpink, color_darkpink)

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]

behavioralMatrixClustering <- as.matrix(clusteringDf)
rownames(behavioralMatrixClustering) <- paste0(combinedDf$Subject)
distMatrix <- dist(behavioralMatrixClustering, method="manhattan")
hresults <- distMatrix %>% hclust(method="complete")

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Failure \nChange of Arousal",
     legend = c("Exceptional Increase" , "Noticable Increase" , "No-change or Decrease"), 
     col = c("red", "pink" , "blue"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))
```

```{r}
NUMBER_OF_CLUSTERS <- 2
CLUSTER_COLORS <- c("red", "blue", color_darkpink, color_darkpink)

color_darkpink = "#e75480"
CLUSTER_BRANCH_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]
CLUSTER_LABEL_COLORS <- CLUSTER_COLORS[1:NUMBER_OF_CLUSTERS]

combinedDf$isM <- ifelse(combinedDf$Activity == "M", 0.1, 0)
combinedDf$isC <- ifelse(combinedDf$Activity == "C", 0.1, 0)
combinedDf$isN <- ifelse(combinedDf$Activity == "NO", 0.1, 0)

behavioralMatrixClustering <- as.matrix(combinedDf %>% select("PP_After", "isM", "isC", "isN"))
rownames(behavioralMatrixClustering) <- paste0(combinedDf$Subject)
distMatrix <- dist(behavioralMatrixClustering, method="manhattan")
hresults <- distMatrix %>% hclust(method="complete")

hc <- hresults %>% 
      as.dendrogram %>%
      set("nodes_cex", NUMBER_OF_CLUSTERS) %>%
      set("labels_col", value = CLUSTER_LABEL_COLORS, k=NUMBER_OF_CLUSTERS) %>%
      # set("leaves_pch", 19) %>%
      # set("leaves_col", value = c("gray"), k=NUMBER_OF_CLUSTERS) %>%    
      set("branches_k_color", value=CLUSTER_BRANCH_COLORS, k=NUMBER_OF_CLUSTERS)

plot(hc)
legend("topright", 
     title="Drive=Failure \nChange of Arousal",
     legend = c("Exceptional Increase" , "Noticable Increase" , "No-change or Decrease"), 
     col = c("red", "pink" , "blue"),
     pch = c(20,20,20), bty = "n",  pt.cex = 1.5, cex = 0.8 , 
     text.col = "black", horiz = FALSE, inset = c(0.0, 0.1))
```